{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "filepath='test_data/user_following_animation.json'\n",
    "data=pd.read_json(filepath,lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_info = pd.read_csv(\"test_data/bilibili_crawler_user_info.csv\",names = ['id','mid','name','sex','sign','the_rank','level','jointime','moral','silence','birthday','coins','fans_badge','role','title','desc','vip_type','vip_status'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>mid</th>\n",
       "      <th>name</th>\n",
       "      <th>sex</th>\n",
       "      <th>sign</th>\n",
       "      <th>the_rank</th>\n",
       "      <th>level</th>\n",
       "      <th>jointime</th>\n",
       "      <th>moral</th>\n",
       "      <th>silence</th>\n",
       "      <th>birthday</th>\n",
       "      <th>coins</th>\n",
       "      <th>fans_badge</th>\n",
       "      <th>role</th>\n",
       "      <th>title</th>\n",
       "      <th>desc</th>\n",
       "      <th>vip_type</th>\n",
       "      <th>vip_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>17515174</td>\n",
       "      <td>试图让群主女装</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>02-24</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>7596516</td>\n",
       "      <td>酒客小丑</td>\n",
       "      <td>NaN</td>\n",
       "      <td>沉迷于吃狗粮中。商业合作请加QQ2375432747  微博：@酒客小丑</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>06-15</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bilibili 2018百大UP主、高能联盟成员</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>10769575</td>\n",
       "      <td>文史阁</td>\n",
       "      <td>NaN</td>\n",
       "      <td>地势坤，君子以厚德载物。</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>01-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bilibili 知名科普UP主</td>\n",
       "      <td>代表作：中国历代帝王合集</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>22989278</td>\n",
       "      <td>果冻萌萌的</td>\n",
       "      <td>NaN</td>\n",
       "      <td>林正英以及香港经典鬼片僵尸片24小时播放 房间号：193  欢迎大家观看</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>09-17</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bilibili 知名UP主</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>130050712</td>\n",
       "      <td>初阳手绘石头</td>\n",
       "      <td>NaN</td>\n",
       "      <td>如果搜不到我，说明你是在查找你的好友里搜，当然搜不到了。因为我还不是你的好友呢，你要在添＋好...</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>01-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  id        mid     name  sex  \\\n",
       "0  1   17515174  试图让群主女装  NaN   \n",
       "1  2    7596516     酒客小丑  NaN   \n",
       "2  3   10769575      文史阁  NaN   \n",
       "3  4   22989278    果冻萌萌的  NaN   \n",
       "4  5  130050712   初阳手绘石头  NaN   \n",
       "\n",
       "                                                sign  the_rank level  \\\n",
       "0                                                NaN   10000.0     5   \n",
       "1               沉迷于吃狗粮中。商业合作请加QQ2375432747  微博：@酒客小丑   10000.0     6   \n",
       "2                                       地势坤，君子以厚德载物。   10000.0     6   \n",
       "3               林正英以及香港经典鬼片僵尸片24小时播放 房间号：193  欢迎大家观看   10000.0     6   \n",
       "4  如果搜不到我，说明你是在查找你的好友里搜，当然搜不到了。因为我还不是你的好友呢，你要在添＋好...   10000.0     5   \n",
       "\n",
       "   jointime  moral  silence birthday  coins  fans_badge  role  \\\n",
       "0       0.0    0.0      0.0    02-24    0.0         0.0   0.0   \n",
       "1       0.0    0.0      0.0    06-15    0.0         1.0   1.0   \n",
       "2       0.0    0.0      0.0    01-01    0.0         0.0   1.0   \n",
       "3       0.0    0.0      0.0    09-17    0.0         1.0   1.0   \n",
       "4       0.0    0.0      0.0    01-01    0.0         1.0   0.0   \n",
       "\n",
       "                       title          desc  vip_type  vip_status  \n",
       "0                        NaN           NaN       2.0         1.0  \n",
       "1  bilibili 2018百大UP主、高能联盟成员           NaN       2.0         1.0  \n",
       "2           bilibili 知名科普UP主  代表作：中国历代帝王合集       0.0         0.0  \n",
       "3             bilibili 知名UP主           NaN       1.0         0.0  \n",
       "4                        NaN           NaN       2.0         1.0  "
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_info.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## user_info 预处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>mid</th>\n",
       "      <th>name</th>\n",
       "      <th>sex</th>\n",
       "      <th>sign</th>\n",
       "      <th>the_rank</th>\n",
       "      <th>level</th>\n",
       "      <th>jointime</th>\n",
       "      <th>moral</th>\n",
       "      <th>silence</th>\n",
       "      <th>birthday</th>\n",
       "      <th>coins</th>\n",
       "      <th>fans_badge</th>\n",
       "      <th>role</th>\n",
       "      <th>title</th>\n",
       "      <th>desc</th>\n",
       "      <th>vip_type</th>\n",
       "      <th>vip_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2534</th>\n",
       "      <td>2535</td>\n",
       "      <td>5398071</td>\n",
       "      <td>吾谶</td>\n",
       "      <td>NaN</td>\n",
       "      <td>CP可以逆  本命不能移</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2535</th>\n",
       "      <td>NaN</td>\n",
       "      <td>10000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10-16</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21457</th>\n",
       "      <td>21457</td>\n",
       "      <td>177589181</td>\n",
       "      <td>哔哩哔哩安全应急响应中心</td>\n",
       "      <td>NaN</td>\n",
       "      <td>&gt;&lt;img src=1 onerror=(alert)(2233)&gt;,10000,3,0,0...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21458</th>\n",
       "      <td>定期搬运各种科普游戏视频，欢迎关注；想来我们不正规字幕组或唠嗑的小伙伴，请加入交流QQ群：7...</td>\n",
       "      <td>10000</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>05-05</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39013</th>\n",
       "      <td>39020</td>\n",
       "      <td>77818</td>\n",
       "      <td>雾香·楼兰</td>\n",
       "      <td>NaN</td>\n",
       "      <td>之前账号：科洛丝琳希停用</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39014</th>\n",
       "      <td>NaN</td>\n",
       "      <td>10000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44168</th>\n",
       "      <td>44174</td>\n",
       "      <td>402348056</td>\n",
       "      <td>苍蓝誓约手游</td>\n",
       "      <td>NaN</td>\n",
       "      <td>初次见面！您...就是指挥官大人吗？emmm，跟想象中不太一样呢~总之，请多多指教喽，指挥官...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44169</th>\n",
       "      <td>最近迷上做神踩点视频\"</td>\n",
       "      <td>10000</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>03-15</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      id        mid  \\\n",
       "2534                                                2535    5398071   \n",
       "2535                                                 NaN      10000   \n",
       "21457                                              21457  177589181   \n",
       "21458  定期搬运各种科普游戏视频，欢迎关注；想来我们不正规字幕组或唠嗑的小伙伴，请加入交流QQ群：7...      10000   \n",
       "39013                                              39020      77818   \n",
       "39014                                                NaN      10000   \n",
       "44168                                              44174  402348056   \n",
       "44169                                        最近迷上做神踩点视频\"      10000   \n",
       "\n",
       "               name  sex                                               sign  \\\n",
       "2534             吾谶  NaN                        CP可以逆  本命不能移                  \n",
       "2535              5  0.0                                                  0   \n",
       "21457  哔哩哔哩安全应急响应中心  NaN  ><img src=1 onerror=(alert)(2233)>,10000,3,0,0...   \n",
       "21458             6  0.0                                                  0   \n",
       "39013         雾香·楼兰  NaN                                       之前账号：科洛丝琳希停用   \n",
       "39014             5  0.0                                                  0   \n",
       "44168        苍蓝誓约手游  NaN  初次见面！您...就是指挥官大人吗？emmm，跟想象中不太一样呢~总之，请多多指教喽，指挥官...   \n",
       "44169             3  0.0                                                  0   \n",
       "\n",
       "       the_rank  level  jointime  moral  silence birthday  coins  fans_badge  \\\n",
       "2534        NaN    NaN       NaN    NaN      NaN      NaN    NaN         NaN   \n",
       "2535        0.0  10-16       0.0    0.0      0.0      NaN    NaN         2.0   \n",
       "21457       NaN    NaN       NaN    NaN      NaN      NaN    NaN         NaN   \n",
       "21458       0.0  05-05       0.0    1.0      0.0      NaN    NaN         1.0   \n",
       "39013       NaN    NaN       NaN    NaN      NaN      NaN    NaN         NaN   \n",
       "39014       1.0    NaN       0.0    0.0      0.0      NaN    NaN         1.0   \n",
       "44168       NaN    NaN       NaN    NaN      NaN      NaN    NaN         NaN   \n",
       "44169       0.0  03-15       0.0    0.0      0.0      NaN    NaN         0.0   \n",
       "\n",
       "       role title desc  vip_type  vip_status  \n",
       "2534    NaN   NaN  NaN       NaN         NaN  \n",
       "2535    1.0   NaN  NaN       NaN         NaN  \n",
       "21457   NaN   NaN  NaN       NaN         NaN  \n",
       "21458   0.0   NaN  NaN       NaN         NaN  \n",
       "39013   NaN   NaN  NaN       NaN         NaN  \n",
       "39014   0.0   NaN  NaN       NaN         NaN  \n",
       "44168   NaN   NaN  NaN       NaN         NaN  \n",
       "44169   0.0   NaN  NaN       NaN         NaN  "
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_info[user_info.vip_type.isna() | user_info.the_rank.isna() | user_info.level.isna()] # 看一下vip_type、the_rank、level有NaN的值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2534, 2535, 21457, 21458, 39013, 39014, 44168, 44169]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_info[user_info.vip_type.isna() | user_info.the_rank.isna() | user_info.level.isna()].index.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_info.drop(user_info[user_info.vip_type.isna() | user_info.the_rank.isna() | user_info.level.isna()].index.tolist(),inplace=True) # 将vip_type、the_rank、level有NaN的行去掉"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 定义权重规则"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the_rank: {10000.0, 30000.0, 20000.0, 25000.0}\n",
      "level: {'6', '3', '4', '5'}\n",
      "vip_type: {0.0, 1.0, 2.0}\n"
     ]
    }
   ],
   "source": [
    "# 看一下这三个可能的取值\n",
    "print('the_rank:',set(user_info.the_rank))\n",
    "print('level:',set(user_info.level))\n",
    "print('vip_type:',set(user_info.vip_type))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "rules={'the_rank':{10000:1,20000:2,25000:3,30000:4},'level':{'3':1,'4':2,'5':3,'6':4},'vip_type':{0:0,1:1,2:2}}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "51424\n",
      "51424\n"
     ]
    }
   ],
   "source": [
    "# 检查一下 user_info.mid有没有重的\n",
    "print(len(user_info))\n",
    "print(len(set(user_info.mid)))\n",
    "a=set()\n",
    "for index,row in user_info.iterrows():\n",
    "    if row.mid in a:\n",
    "        print()\n",
    "\n",
    "        print(row)\n",
    "    else:\n",
    "        a.add(row.mid)\n",
    "            "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 根据权重增加数据函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [],
   "source": [
    "def user_power(data,rules,user_info):\n",
    "    \"\"\"\n",
    "    params:\n",
    "        data:用户收藏ID数据集\n",
    "        rules:权重规则\n",
    "        user_info:用户信息数据集\n",
    "    return:新数据集\n",
    "        \n",
    "    \"\"\"\n",
    "    new_data = pd.DataFrame(columns=data.columns)\n",
    "    for index,row in data.iterrows():\n",
    "        info = user_info[user_info.mid == int(row.key)]\n",
    "        if len(info):\n",
    "            the_power = rules['the_rank'][info.the_rank.values[0]] + rules['level'][info.level.values[0]] + rules['vip_type'][info.vip_type.values[0]]\n",
    "        else:  # len(info)==0 说明在user_info中没有这个用户的相关数据\n",
    "            the_power = 1\n",
    "#         print(the_power)\n",
    "        for i in range(the_power):\n",
    "            new_data = new_data.append(row,ignore_index=True)\n",
    "    \n",
    "    return new_data\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 函数测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>db</th>\n",
       "      <th>key</th>\n",
       "      <th>size</th>\n",
       "      <th>ttl</th>\n",
       "      <th>type</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7</td>\n",
       "      <td>330817737</td>\n",
       "      <td>6</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[132112]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>321510739</td>\n",
       "      <td>9</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[6463, 23352]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>27209511</td>\n",
       "      <td>20</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[425, 2580, 5069, 6446, 11712]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>7</td>\n",
       "      <td>24774761</td>\n",
       "      <td>12</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[5550, 5849, 5852]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7</td>\n",
       "      <td>16686749</td>\n",
       "      <td>259</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   db        key  size  ttl type  \\\n",
       "0   7  330817737     6   -1  set   \n",
       "1   7  321510739     9   -1  set   \n",
       "2   7   27209511    20   -1  set   \n",
       "3   7   24774761    12   -1  set   \n",
       "4   7   16686749   259   -1  set   \n",
       "\n",
       "                                               value  \n",
       "0                                           [132112]  \n",
       "1                                      [6463, 23352]  \n",
       "2                     [425, 2580, 5069, 6446, 11712]  \n",
       "3                                 [5550, 5849, 5852]  \n",
       "4  [53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...  "
      ]
     },
     "execution_count": 163,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 测试数据集\n",
    "test_data = data.head() \n",
    "test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>mid</th>\n",
       "      <th>name</th>\n",
       "      <th>sex</th>\n",
       "      <th>sign</th>\n",
       "      <th>the_rank</th>\n",
       "      <th>level</th>\n",
       "      <th>jointime</th>\n",
       "      <th>moral</th>\n",
       "      <th>silence</th>\n",
       "      <th>birthday</th>\n",
       "      <th>coins</th>\n",
       "      <th>fans_badge</th>\n",
       "      <th>role</th>\n",
       "      <th>title</th>\n",
       "      <th>desc</th>\n",
       "      <th>vip_type</th>\n",
       "      <th>vip_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>17515174</td>\n",
       "      <td>试图让群主女装</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>02-24</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>7596516</td>\n",
       "      <td>酒客小丑</td>\n",
       "      <td>NaN</td>\n",
       "      <td>沉迷于吃狗粮中。商业合作请加QQ2375432747  微博：@酒客小丑</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>06-15</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bilibili 2018百大UP主、高能联盟成员</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>10769575</td>\n",
       "      <td>文史阁</td>\n",
       "      <td>NaN</td>\n",
       "      <td>地势坤，君子以厚德载物。</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>01-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bilibili 知名科普UP主</td>\n",
       "      <td>代表作：中国历代帝王合集</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>22989278</td>\n",
       "      <td>果冻萌萌的</td>\n",
       "      <td>NaN</td>\n",
       "      <td>林正英以及香港经典鬼片僵尸片24小时播放 房间号：193  欢迎大家观看</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>09-17</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bilibili 知名UP主</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>130050712</td>\n",
       "      <td>初阳手绘石头</td>\n",
       "      <td>NaN</td>\n",
       "      <td>如果搜不到我，说明你是在查找你的好友里搜，当然搜不到了。因为我还不是你的好友呢，你要在添＋好...</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>01-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41850</th>\n",
       "      <td>41856</td>\n",
       "      <td>330817737</td>\n",
       "      <td>派大排</td>\n",
       "      <td>NaN</td>\n",
       "      <td>这个人懒死了，什么都没有写~o(〃'▽'〃)o</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10-11</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30606</th>\n",
       "      <td>30613</td>\n",
       "      <td>321510739</td>\n",
       "      <td>狐妖枖仸殀</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11-30</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5499</th>\n",
       "      <td>5499</td>\n",
       "      <td>27209511</td>\n",
       "      <td>frank不是什么大神啦</td>\n",
       "      <td>NaN</td>\n",
       "      <td>专注于鬼畜和生活，想要成为大剪辑师，加油，新粉丝群还是遇到状况了，具体开放时间请耐心等待～</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>02-17</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39740</th>\n",
       "      <td>39746</td>\n",
       "      <td>24774761</td>\n",
       "      <td>乐小昭</td>\n",
       "      <td>NaN</td>\n",
       "      <td>失踪人口一位……  微博：@乐小昭</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>01-01</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1443</th>\n",
       "      <td>1444</td>\n",
       "      <td>16686749</td>\n",
       "      <td>海棠花丶</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>04-14</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          id        mid          name  sex  \\\n",
       "0          1   17515174       试图让群主女装  NaN   \n",
       "1          2    7596516          酒客小丑  NaN   \n",
       "2          3   10769575           文史阁  NaN   \n",
       "3          4   22989278         果冻萌萌的  NaN   \n",
       "4          5  130050712        初阳手绘石头  NaN   \n",
       "41850  41856  330817737           派大排  NaN   \n",
       "30606  30613  321510739         狐妖枖仸殀  NaN   \n",
       "5499    5499   27209511  frank不是什么大神啦  NaN   \n",
       "39740  39746   24774761           乐小昭  NaN   \n",
       "1443    1444   16686749          海棠花丶  NaN   \n",
       "\n",
       "                                                    sign  the_rank level  \\\n",
       "0                                                    NaN   10000.0     5   \n",
       "1                   沉迷于吃狗粮中。商业合作请加QQ2375432747  微博：@酒客小丑   10000.0     6   \n",
       "2                                           地势坤，君子以厚德载物。   10000.0     6   \n",
       "3                   林正英以及香港经典鬼片僵尸片24小时播放 房间号：193  欢迎大家观看   10000.0     6   \n",
       "4      如果搜不到我，说明你是在查找你的好友里搜，当然搜不到了。因为我还不是你的好友呢，你要在添＋好...   10000.0     5   \n",
       "41850                            这个人懒死了，什么都没有写~o(〃'▽'〃)o   10000.0     3   \n",
       "30606                                                NaN   10000.0     4   \n",
       "5499       专注于鬼畜和生活，想要成为大剪辑师，加油，新粉丝群还是遇到状况了，具体开放时间请耐心等待～   10000.0     5   \n",
       "39740                                  失踪人口一位……  微博：@乐小昭   10000.0     3   \n",
       "1443                                                 NaN   10000.0     5   \n",
       "\n",
       "       jointime  moral  silence birthday  coins  fans_badge  role  \\\n",
       "0           0.0    0.0      0.0    02-24    0.0         0.0   0.0   \n",
       "1           0.0    0.0      0.0    06-15    0.0         1.0   1.0   \n",
       "2           0.0    0.0      0.0    01-01    0.0         0.0   1.0   \n",
       "3           0.0    0.0      0.0    09-17    0.0         1.0   1.0   \n",
       "4           0.0    0.0      0.0    01-01    0.0         1.0   0.0   \n",
       "41850       0.0    0.0      0.0    10-11    0.0         0.0   0.0   \n",
       "30606       0.0    0.0      0.0    11-30    0.0         0.0   0.0   \n",
       "5499        0.0    0.0      0.0    02-17    0.0         1.0   0.0   \n",
       "39740       0.0    0.0      0.0    01-01    0.0         0.0   0.0   \n",
       "1443        0.0    0.0      0.0    04-14    0.0         0.0   0.0   \n",
       "\n",
       "                           title          desc  vip_type  vip_status  \n",
       "0                            NaN           NaN       2.0         1.0  \n",
       "1      bilibili 2018百大UP主、高能联盟成员           NaN       2.0         1.0  \n",
       "2               bilibili 知名科普UP主  代表作：中国历代帝王合集       0.0         0.0  \n",
       "3                 bilibili 知名UP主           NaN       1.0         0.0  \n",
       "4                            NaN           NaN       2.0         1.0  \n",
       "41850                        NaN           NaN       0.0         0.0  \n",
       "30606                        NaN           NaN       1.0         0.0  \n",
       "5499                         NaN           NaN       2.0         1.0  \n",
       "39740                        NaN           NaN       0.0         0.0  \n",
       "1443                         NaN           NaN       2.0         1.0  "
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 测试用户信息数据集\n",
    "test_user_info = user_info.head()\n",
    "for index,row in  test_data.iterrows():\n",
    "    test_user_info = test_user_info.append(user_info[user_info.mid == int(row.key)])\n",
    "test_user_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>db</th>\n",
       "      <th>key</th>\n",
       "      <th>size</th>\n",
       "      <th>ttl</th>\n",
       "      <th>type</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7</td>\n",
       "      <td>330817737</td>\n",
       "      <td>6</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[132112]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>330817737</td>\n",
       "      <td>6</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[132112]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>321510739</td>\n",
       "      <td>9</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[6463, 23352]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>7</td>\n",
       "      <td>321510739</td>\n",
       "      <td>9</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[6463, 23352]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7</td>\n",
       "      <td>321510739</td>\n",
       "      <td>9</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[6463, 23352]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>7</td>\n",
       "      <td>321510739</td>\n",
       "      <td>9</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[6463, 23352]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>27209511</td>\n",
       "      <td>20</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[425, 2580, 5069, 6446, 11712]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>27209511</td>\n",
       "      <td>20</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[425, 2580, 5069, 6446, 11712]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>7</td>\n",
       "      <td>27209511</td>\n",
       "      <td>20</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[425, 2580, 5069, 6446, 11712]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>7</td>\n",
       "      <td>27209511</td>\n",
       "      <td>20</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[425, 2580, 5069, 6446, 11712]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>7</td>\n",
       "      <td>27209511</td>\n",
       "      <td>20</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[425, 2580, 5069, 6446, 11712]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>7</td>\n",
       "      <td>27209511</td>\n",
       "      <td>20</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[425, 2580, 5069, 6446, 11712]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>7</td>\n",
       "      <td>24774761</td>\n",
       "      <td>12</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[5550, 5849, 5852]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>7</td>\n",
       "      <td>24774761</td>\n",
       "      <td>12</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[5550, 5849, 5852]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>7</td>\n",
       "      <td>16686749</td>\n",
       "      <td>259</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>7</td>\n",
       "      <td>16686749</td>\n",
       "      <td>259</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>7</td>\n",
       "      <td>16686749</td>\n",
       "      <td>259</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>7</td>\n",
       "      <td>16686749</td>\n",
       "      <td>259</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>7</td>\n",
       "      <td>16686749</td>\n",
       "      <td>259</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>7</td>\n",
       "      <td>16686749</td>\n",
       "      <td>259</td>\n",
       "      <td>-1</td>\n",
       "      <td>set</td>\n",
       "      <td>[53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   db        key size ttl type  \\\n",
       "0   7  330817737    6  -1  set   \n",
       "1   7  330817737    6  -1  set   \n",
       "2   7  321510739    9  -1  set   \n",
       "3   7  321510739    9  -1  set   \n",
       "4   7  321510739    9  -1  set   \n",
       "5   7  321510739    9  -1  set   \n",
       "6   7   27209511   20  -1  set   \n",
       "7   7   27209511   20  -1  set   \n",
       "8   7   27209511   20  -1  set   \n",
       "9   7   27209511   20  -1  set   \n",
       "10  7   27209511   20  -1  set   \n",
       "11  7   27209511   20  -1  set   \n",
       "12  7   24774761   12  -1  set   \n",
       "13  7   24774761   12  -1  set   \n",
       "14  7   16686749  259  -1  set   \n",
       "15  7   16686749  259  -1  set   \n",
       "16  7   16686749  259  -1  set   \n",
       "17  7   16686749  259  -1  set   \n",
       "18  7   16686749  259  -1  set   \n",
       "19  7   16686749  259  -1  set   \n",
       "\n",
       "                                                value  \n",
       "0                                            [132112]  \n",
       "1                                            [132112]  \n",
       "2                                       [6463, 23352]  \n",
       "3                                       [6463, 23352]  \n",
       "4                                       [6463, 23352]  \n",
       "5                                       [6463, 23352]  \n",
       "6                      [425, 2580, 5069, 6446, 11712]  \n",
       "7                      [425, 2580, 5069, 6446, 11712]  \n",
       "8                      [425, 2580, 5069, 6446, 11712]  \n",
       "9                      [425, 2580, 5069, 6446, 11712]  \n",
       "10                     [425, 2580, 5069, 6446, 11712]  \n",
       "11                     [425, 2580, 5069, 6446, 11712]  \n",
       "12                                 [5550, 5849, 5852]  \n",
       "13                                 [5550, 5849, 5852]  \n",
       "14  [53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...  \n",
       "15  [53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...  \n",
       "16  [53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...  \n",
       "17  [53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...  \n",
       "18  [53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...  \n",
       "19  [53, 710, 835, 844, 1547, 3365, 3398, 3494, 43...  "
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new = user_power(test_data,rules,test_user_info)\n",
    "new"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到，根据权重，相应的数据重复了多次"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
